Report last updated Tue May 31 14:56:22 2016.

library(ggplot2)
library(reshape)
library(limma)
library(CHBUtils)
library(biomaRt)
library(dplyr)
library(cluster)
library(gridExtra)
library(logging)
library(org.Mm.eg.db)
library(DEGreport)
library(isomiRs)
library(SummarizedExperiment)
library(dplyr)
order_group=c("normal", "day3", "day7", "day14")
# root_path = "~/orch/scratch/vishal_mirna_kidney/publish/UUO-model"
result_files = file.path(root_path, "omics/files_publish")
dir.create(result_files, showWarnings = F, recursive = T)
basicConfig(level='INFO')

Protein analysis

FC > 4 and FDR < 1%

protein_file = file.path(root_path, "protein", "files_publish")
dir.create(protein_file, recursive = T, showWarnings = F)
counts = read.csv(file.path(root_path, "protein", "UUO_Proteomics_RawData.csv"),skip = 1) %>% tidyr::separate(Protein.Id, c("sp", "Uniprot.ID", "type"),sep = "[::|::]", extra = "merge")
row.names(counts) = counts$Uniprot.ID

mapping = data.frame(id=rownames(counts))
mapping = annotate_df(mapping, "id", 'mmusculus_gene_ensembl', "uniprot_genename", "ensembl_gene_id")  %>% distinct(ensembl_gene_id)

mapping2 = data.frame(id=unique(counts$Gene.Symbol)[!is.na(unique(counts$Gene.Symbol))])
mapping2 = annotate_df(mapping2, "id", 'mmusculus_gene_ensembl', "wikigene_name", "ensembl_gene_id")  %>% distinct(ensembl_gene_id)

counts[as.character(mapping$id), "ensembl"] = mapping$ensembl_gene_id 
idx = match(as.character(mapping2$id),as.character(counts$Gene.Symbol))
counts[idx, "ensembl"] = mapping2$ensembl_gene_id
counts$symbol = convertIDs(as.character(counts$ensembl), "ENSEMBL", "SYMBOL", org.Mm.eg.db)
save_file(counts, "counts_annotated.csv", protein_file)

counts = counts %>% distinct(ensembl) %>% filter(!is.na(ensembl))
clean_counts = counts[,9:18]
rownames(clean_counts) = counts$ensembl
names(clean_counts) = sapply(names(clean_counts), tolower)

cat("## Expression density of 'raw' data\n\n")

Expression density of ‘raw’ data

ggplot(melt(clean_counts), aes(x=log2(value), colour=variable)) + geom_density()

dge = edgeR::DGEList(clean_counts)
dge = edgeR::calcNormFactors(dge, method="TMM")

coldata = data.frame(row.names=colnames(clean_counts), samples=colnames(clean_counts)) %>% tidyr::separate(samples,into = c("group"), extra = "drop")
coldata$group = sapply(coldata$group, tolower)
norm_counts = edgeR::cpm(dge, log=T)

MA <- normalizeBetweenArrays(as.matrix(norm_counts), method="none")

cat("## Expression density of normalize data\n\n")

Expression density of normalize data

ggplot(melt(as.data.frame(MA)), aes(x=value, colour=variable)) + geom_density()

cat("## MDS plot\n\n")

MDS plot

mds(MA, condition = coldata$group)

save_file(MA, "uuo_model_log2_counts.tsv", protein_file)
design <- model.matrix(~ 0 + coldata$group)
colnames(design) = c("day14", "day3", "day7", "normal")
fit <- lmFit(MA, design)
contrast.matrix <- makeContrasts(day3-normal, day7-day3, day14-day7, day14-normal, day14-day3, day7-normal, levels=design)
fit2 <- contrasts.fit(fit, contrast.matrix)
fit2 <- eBayes(fit2)
res = topTable(fit2, adjust="BH", n="Inf")
save_file(res, "uuo_model.tsv", protein_file)
absMaxFC = rowMax(as.matrix(abs(res[,1:6])))
sign = row.names(res[absMaxFC>4 & res$adj.P.Val<0.01,])

cat("## MDS plot of DE proteins\n\n")

MDS plot of DE proteins

mds(MA[sign,], condition = coldata$group)

coldata$group = factor(coldata$group, levels=order_group)
cat("## Clustering analysis\n\n")

Clustering analysis

clusters = degPatterns(MA[sign,], coldata, minc = 15, summarize = "group", time="group", col = NULL)

Working with 92 genes

.df=clusters$df
save_file(.df, "clusters_genes_cluster.tsv", protein_file)
name_res = compress_results(protein_file, prefix = "uuo_model_prot_ournorm", zip = TRUE)

3D interaction analysis

mrna_path = "rnaseq/final/2016-02-03_mrna/files"
mrna_matrix = read.csv(file.path(root_path, mrna_path, "uuo_model_log2_counts.tsv"), row.names = 1)
mrna_clusters = read.csv(file.path(root_path, mrna_path, "clusters_genes.tsv"), row.names = 1)
mrna_col = data.frame(row.names=colnames(mrna_matrix), samples=colnames(mrna_matrix)) %>% tidyr::separate(samples,into = c("group"), extra = "drop") %>% mutate(group=factor(group, levels=order_group))
rownames(mrna_col) = colnames(mrna_matrix)
    
prot_path = "protein/files_publish"
prot_matrix = read.csv(file.path(root_path, prot_path, "uuo_model_log2_counts.tsv"), row.names = 1)
prot_clusters = read.csv(file.path(root_path, prot_path, "clusters_genes_cluster.tsv"), row.names = 1)
prot_col = data.frame(row.names=colnames(prot_matrix), samples=colnames(prot_matrix)) %>% tidyr::separate(samples,into = c("group"), extra = "drop") %>% mutate(group=factor(group, levels=order_group))
rownames(prot_col) = colnames(prot_matrix)

mirna_path = "srnaseq/final/files_publish"
mirna_matrix = read.csv(file.path(root_path, mirna_path, "uuo_model_mirna_log2_counts.tsv"), row.names = 1)
load(file.path(root_path, mirna_path, "ma.rda"))
mirna_clusters = read.csv(file.path(root_path, mirna_path, "go_terms_network.tsv"))
mirna_col = data.frame(row.names=colnames(mirna_matrix), samples=colnames(mirna_matrix)) %>% tidyr::separate(samples,into = c("group"), extra = "drop") %>% mutate(group=factor(group, levels=order_group))
rownames(mirna_col) = colnames(mirna_matrix)
#mrna_cluster = annotate_df(mrna_clusters, "genes", 'mmusculus_gene_ensembl', "ensembl_gene_id", "gene_biotype")
mrna_keep = as.character(mrna_clusters$genes)
prot_keep = unique(c(intersect(mrna_keep, rownames(prot_matrix)), as.character(prot_clusters$genes)))
keep = intersect(mrna_keep, rownames(prot_matrix))

df = degMerge(list(mrna=mrna_matrix[keep,], prot=prot_matrix[keep,]), 
                   list(mrna=mrna_clusters), 
                   list(mrna=mrna_col, prot=prot_col), 
                   summarize="group", 
                   time="group", col=NULL, 
                   scale=TRUE)

Integration of : mrna prot

Cluster number 1

Working with 129 genes

Cluster number 2

Working with 28 genes

Cluster number 3

Working with 6 genes

Cluster number 4

Working with 18 genes

Cluster number 5

Working with 75 genes

prot_merged = do.call(rbind, df)
prot_merged$name = convertIDs(as.character(prot_merged$gene), "ENSEMBL", "SYMBOL", org.Mm.eg.db)
names(prot_merged) = paste0("prot_", names(prot_merged))

mi_rse = SummarizedExperiment(assays=SimpleList(norm=as.matrix(mirna_matrix)), colData= mirna_col)
gene_rse = SummarizedExperiment(assays=SimpleList(norm=as.matrix(mrna_matrix)), colData=mrna_col)
cor_tar = find_targets(mi_rse, gene_rse, ma)

Dimmension of cor matrix: 121 5851

mapping = melt(cor_tar) %>% filter(value<0)
keep = intersect(mrna_keep, unique(mapping[,1]))
mirna_keep = as.character(unique(mapping[,2]))
df = degMerge(list(mrna=mrna_matrix[keep,],
                   mirna=mirna_matrix[mirna_keep,]), 
              list(mrna=mrna_clusters), 
              list(mrna=mrna_col, mirna=mirna_col), 
              summarize="group", 
              time="group", col=NULL, 
              scale=TRUE, mapping=list(mirna=mapping))

Integration of : mrna mirna

Cluster number 1

Working with 29 genes

Cluster number 4

Working with 12 genes

Cluster number 5

Working with 18 genes

mirna_merged = do.call(rbind, df)
mirna_merged$name = convertIDs(as.character(mirna_merged$pair), "ENSEMBL", "SYMBOL", org.Mm.eg.db)
names(mirna_merged) = paste0("mirna_", names(mirna_merged))

merged = merge(mirna_merged[mirna_merged$mirna_cor < -.6,c(6,7,1:5)], prot_merged[prot_merged$prot_cor > .6,c(5,7,1:4)], by=1, all=TRUE)
merged_all = merge(mirna_merged[c(6,7,1:5)], prot_merged[c(5,7,1:4)], by=1, all=TRUE)

save_file(merged, "omics_filtered.csv", result_files)
save_file(merged_all, "omics.csv", result_files)
save_file(prot_merged, "prot_omics.csv", result_files)
save_file(mirna_merged, "mirna_omics.csv", result_files)

R Session Info

(useful if replicating these results)

sessionInfo()
R version 3.3.0 (2016-05-03)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Debian GNU/Linux stretch/sid

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] parallel  stats4    methods   stats     graphics  grDevices utils    
[8] datasets  base     

other attached packages:
 [1] isomiRs_0.99.18            SummarizedExperiment_1.2.2
 [3] GenomicRanges_1.24.0       GenomeInfoDb_1.8.1        
 [5] DiscriMiner_0.1-29         DEGreport_1.5.0           
 [7] quantreg_5.24              SparseM_1.7               
 [9] org.Mm.eg.db_3.3.0         AnnotationDbi_1.34.3      
[11] IRanges_2.6.0              S4Vectors_0.10.1          
[13] Biobase_2.32.0             BiocGenerics_0.18.0       
[15] logging_0.7-103            gridExtra_2.2.1           
[17] cluster_2.0.4              dplyr_0.4.3               
[19] biomaRt_2.28.0             CHBUtils_0.1              
[21] limma_3.28.5               reshape_0.8.5             
[23] ggplot2_2.1.0              knitr_1.13                
[25] myRfunctions_0.1           rmarkdown_0.9.6           
[27] BiocInstaller_1.22.2      

loaded via a namespace (and not attached):
 [1] tidyr_0.4.1         edgeR_3.14.0        splines_3.3.0      
 [4] gtools_3.5.0        Formula_1.2-1       assertthat_0.1     
 [7] latticeExtra_0.6-28 yaml_2.1.13         RSQLite_1.0.0      
[10] lattice_0.20-33     chron_2.3-47        digest_0.6.9       
[13] RColorBrewer_1.1-2  XVector_0.12.0      colorspace_1.2-6   
[16] htmltools_0.3.5     Matrix_1.2-6        plyr_1.8.3         
[19] DESeq2_1.12.3       XML_3.98-1.4        genefilter_1.54.2  
[22] zlibbioc_1.18.0     xtable_1.8-2        scales_0.4.0       
[25] gdata_2.17.0        BiocParallel_1.6.2  MatrixModels_0.4-1 
[28] annotate_1.50.0     lazyeval_0.1.10     nnet_7.3-12        
[31] survival_2.39-4     magrittr_1.5        evaluate_0.9       
[34] GGally_1.0.1        gplots_3.0.1        foreign_0.8-66     
[37] tools_3.3.0         data.table_1.9.6    formatR_1.4        
[40] stringr_1.0.0       locfit_1.5-9.1      munsell_0.4.3      
[43] caTools_1.17.1      grid_3.3.0          RCurl_1.95-4.8     
[46] labeling_0.3        bitops_1.0-6        codetools_0.2-14   
[49] gtable_0.2.0        DBI_0.4-1           R6_2.1.2           
[52] Nozzle.R1_1.1-1     Hmisc_3.17-4        KernSmooth_2.23-15 
[55] stringi_1.1.1       Rcpp_0.12.5         geneplotter_1.50.0 
[58] rpart_4.1-10        acepack_1.3-3.3     coda_0.18-1